}, export= ls(),
cpus = 3)
# Get average coherence for each model
coherence_mat <- data.frame(k = sapply(model_list, function(x) nrow(x$phi)),
coherence = sapply(model_list, function(x) mean(x$coherence)),
stringsAsFactors = FALSE)
# Plot the result
# On larger (~1,000 or greater documents) corpora, you will usually get a clear peak
plot(coherence_mat, type = "o")
model_list <- TmParallelApply(X = k_list, FUN = function(k){
m <- FitLdaModel(dtm = dtm2matrix,
k = k,
iterations = 500,
burnin = 100,
alpha = 1/k,
beta = .05,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_coherence = TRUE,
calc_r2 = TRUE)
m$k <- k
m
}, export= ls(),
cpus = 3)
# Get average coherence for each model
coherence_mat <- data.frame(k = sapply(model_list, function(x) nrow(x$phi)),
coherence = sapply(model_list, function(x) mean(x$coherence)),
stringsAsFactors = FALSE)
# Plot the result
# On larger (~1,000 or greater documents) corpora, you will usually get a clear peak
plot(coherence_mat, type = "o")
# choose number of topics
k_list <- seq(2,30, by = 2)
model_list <- TmParallelApply(X = k_list, FUN = function(k){
m <- FitLdaModel(dtm = dtm2matrix,
k = k,
iterations = 500,
burnin = 100,
alpha = 0.1,
beta = .05,
#                   optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_coherence = TRUE,
calc_r2 = TRUE)
m$k <- k
m
}, export= ls(),
cpus = 3)
19/6
9/3
19.50/6
# choose number of topics
k_list <- seq(5,50, by = 2)
model_list <- TmParallelApply(X = k_list, FUN = function(k){
m <- FitLdaModel(dtm = dtm2matrix,
k = k,
iterations = 500,
burnin = 100,
alpha = 0.1,
beta = .05,
#                   optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_coherence = TRUE,
calc_r2 = TRUE)
m$k <- k
m
}, export= ls(),
cpus = 3)
# Get average coherence for each model
coherence_mat <- data.frame(k = sapply(model_list, function(x) nrow(x$phi)),
coherence = sapply(model_list, function(x) mean(x$coherence)),
stringsAsFactors = FALSE)
# Plot the result
# On larger (~1,000 or greater documents) corpora, you will usually get a clear peak
plot(coherence_mat, type = "o")
ggsave(filename = "output/Figure_A2.pdf",
width = 7, height = 5, dpi = "retina")
# Plot the result
# On larger (~1,000 or greater documents) corpora, you will usually get a clear peak
p1 <- plot(coherence_mat, type = "o")
ggsave(filename = "output/Figure_A2.pdf", plot = p1,
width = 7, height = 5, dpi = "retina")
# Plot the result
pdf("output/Figure_A2.pdf", width = 7, height = 5)
plot(coherence_mat, type = "o")
dev.off()
View(acled_btm)
save(acled_btm, file = "output/acled_btm.Rda")
save(all_protests_sub, file = "output/all_protests_sub.Rda")
# Load protest data
load("output/all_protests_sub.Rda")
# Plot share of Covid-related protests over time
all_protests_monthly <- all_protests_sub %>%
mutate(year_month = yearmonth(event_date)) %>%
group_by(year, year_month) %>% # aggregate at year month level
summarise(monthly_events = n(),
monthly_pandem_events = sum(corona_topic, na.rm = T),
monthly_non_pandem_events = monthly_events - monthly_pandem_events) %>%
mutate(share_pandemic =  (monthly_pandem_events / monthly_events)* 100) %>%
ungroup() %>%
pivot_longer(cols = c(monthly_events:share_pandemic),
names_to = "variable")  %>%
filter(variable != "monthly_events")
# Plot events over time
ggplot(all_protests_monthly %>% filter(variable != "share_pandemic" & year %in% c(2019,2020,2021,2022)),
aes(x = year_month, y = value,
fill = factor(variable))) +
geom_area(alpha = 0.55, color = "black", show.legend = FALSE) +
scale_y_continuous("Protest events", breaks = seq(0,21000,3000)) +
scale_x_yearmonth("", date_breaks = "3 months", date_labels = "%b %Y") +
scale_fill_manual(values= c("grey70", "grey20")) +
theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_vline(xintercept = lubridate::ymd("2020 Feb", truncated = 1), lty = "dotted") +
geom_vline(xintercept = lubridate::ymd("2020 Apr", truncated = 1), lty = "dotted") +
geom_vline(xintercept = lubridate::ymd("2020 Sep", truncated = 1), lty = "dotted") +
geom_vline(xintercept = lubridate::ymd("2021 May", truncated = 1), lty = "dotted") +
geom_vline(xintercept = lubridate::ymd("2021 Nov", truncated = 1), lty = "dotted") +
annotate(geom = "text", x = lubridate::ymd("2020 Jan", truncated = 1), y = 15000, alpha = .75, label = '"Global health emergency"',  angle = 90, size = 4) +
annotate(geom = "text", x = lubridate::ymd("2020 Mar", truncated = 1), y = 14000, alpha = .75, label = "Global cases exceed 1 Mio.", angle = 90, size = 4) +
annotate(geom = "text", x = lubridate::ymd("2020 Aug", truncated = 1), y = 8000, alpha = .75, label = "Global deaths reach 1 Mio.", angle = 90, size = 4) +
annotate(geom = "text", x = lubridate::ymd("2021 Apr", truncated = 1), y = 8000, alpha = .75, label = "Delta variant detected", angle = 90, size = 4) +
annotate(geom = "text", x = lubridate::ymd("2021 Oct", truncated = 1), y = 8000, alpha = .75, label = "Omicron variant detected", angle = 90, size = 4)
# Save plot
ggsave("output/Figure_1.pdf", width = 7, height = 5.5, dpi = "retina")
# Load topic scores
load("output/acled_btm.Rda")
# Aggregate topics at the year-week level
acled_btm_sub <- acled_btm %>%
group_by(year_week) %>%
summarise(across(health_care:education,
~ mean(., na.rm = T))) %>%
pivot_longer(cols = c(health_care:education), names_to = "topic") %>%
ungroup() %>%
mutate(topic_label = case_when(topic == "economic_consequences" ~ "economy",
topic == "imprisonment_crime" ~ "imprisonment and crime",
topic == "vaccination" ~ "vaccination",
topic == "education" ~ "education",
topic == "health_care" ~ "health care",
topic == "mishandling_corruption" ~ "mismanagement",
topic == "restrictions_masks" ~ "restrictions/masks",
topic == "business_restrictions" ~ "business restrictions",
T ~ NA_character_))
# PLot topic prevalence over time
ggplot(acled_btm_sub ,
aes(x = as.POSIXct(year_week),
y = value,
group = factor(topic_label))) +
#  scale_color_brewer(breaks = c(3,2,1,0),labels = c("liberal dem.", "electoral dem",
#                                "electoral aut.", "closed aut"),type = "qual") +
geom_point(color = "grey", fill = "white", alpha = .65) +
geom_smooth(color = "black", span = .55) +
scale_y_continuous("beta") +
scale_x_yearweek("", date_breaks = "3 months", date_labels = "%b %Y") +
geom_vline(xintercept = as.POSIXct(yearweek("2020 W10")), lty = "dashed" )+
theme(legend.position = "none",
axis.text.x = element_text(angle=45, hjust=1)) +
facet_wrap(~ topic_label, scales = "fixed", ncol = 2)
ggsave("output/Figure_2.pdf", width = 7, height = 6, dpi = "retina")
# at monthly level
acled_btm_sub_mon <- acled_btm_sub %>%
mutate(year_month = yearmonth(year_week)) %>%
group_by(year_month, topic, topic_label) %>%
summarise(mean_value = mean(value, na.rm = T)) %>%
ungroup()
# Plot topics over time in one plot
ggplot(acled_btm_sub_mon,
aes(x = year_month, y = mean_value,
fill = factor(topic_label))) +
geom_area(alpha = 0.55, color = "black") +
scale_y_continuous("Topic share", breaks = seq(0,1,0.2)) +
scale_x_yearmonth("", date_breaks = "3 months", date_labels = "%b %Y") +
scale_fill_manual("",values=met.brewer("OKeeffe1", 8)) +
theme(axis.text.x = element_text(angle=45, hjust=1))
# Select countries to be plotted
countries <- c("Germany",
"United States",
"Brazil",
"United Kingdom",
"Italy",
"South Africa",
"Venezuela",
"India",
"Turkey")
# Reshape data
acled_during_sub <- acled_btm %>%
filter(country %in% countries) %>%
select(country, health_care:education)  %>%
pivot_longer(
cols = c(health_care:education),
names_to = "topic",
values_to = "mean_beta") %>%
group_by(country, topic) %>%
summarise(mean_beta = mean(mean_beta, na.rm = T)) %>%
ungroup() %>%
mutate(topic_label = case_when(topic == "economic_consequences" ~ "economy",
topic == "imprisonment_crime" ~ "imprisonment/crime",
topic == "vaccination" ~ "vaccination",
topic == "education" ~ "education",
topic == "health_care" ~ "health care",
topic == "mishandling_corruption" ~ "mismanagement",
topic == "restrictions_masks" ~ "restrictions/masks",
topic == "business_restrictions" ~ "business restrictions",
T ~ NA_character_)) %>%
mutate(country = factor(country, levels = c("Germany",
"United States",
"Brazil",
"United Kingdom",
"Italy",
"South Africa",
"Venezuela",
"India",
"Turkey")))
ggdotchart(
acled_during_sub, x = "topic_label", y = "mean_beta",
group = "country", color = "topic", palette = "lancet",
add = "segment", position = position_dodge(0.3),
sorting = "descending", facet.by = c("country"),
rotate = TRUE, legend = "none", xlab = "", ylab = "mean beta"
)
ggsave(filename = "output/Figure_3.pdf",
width = 8, height = 5, dpi = "retina")
# Load topic scores
load("output/acled_btm.Rda")
# Load V-Dem data (subset)
vdem_sub <- rio::import("data/vdem_subset.rda")
View(vdem_sub)
packageVersion("vdemdata")
devtools::install_github("vdeminstitute/vdemdata")
install.packages("devtools")
devtools::install_github("vdeminstitute/vdemdata")
renv::init()
library(vdemdata)
packageVersion("vdemdata")
install.packages("devtools")
# Load V-Dem data (subset)
vdem_sub <- rio::import("data/vdem_subset.rda")
View(vdem_sub)
# Load V-Dem data (subset)
vdem_sub <- rio::import("data/vdem_subset.rda")
View(vdem_sub)
acled_topics_vdem <- left_join(acled_btm, vdem_sub, by = c("cowcode", "year")) %>%
select(data_id:corona_topic, max_topic, v2x_polyarchy:v2x_egal, everything())
acled_topics_vdem <- left_join(acled_btm, vdem_sub, by = c("cowcode", "year")) %>%
select(data_id:corona_topic, max_topic, v2x_polyarchy:e_regionpol_6C, everything())
# Aggregtate at region level
acled_regions <- acled_topics_vdem %>%
group_by(e_regionpol_6C) %>%
summarise(across(c("health_care",
"restrictions_masks",
"vaccination",
"imprisonment_crime",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education",
"v2x_libdem"),
~ mean(., na.rm = T))) %>%
mutate(regio = case_when(e_regionpol_6C == 1 ~ "Eastern Europe and Central Asia",
e_regionpol_6C == 2 ~ "Latin America and the Caribbean",
e_regionpol_6C == 3 ~ "Middle East and North Africa",
e_regionpol_6C == 4 ~ "Sub-Saharan Africa",
e_regionpol_6C == 5 ~ "Western Europe and North America",
e_regionpol_6C == 6 ~ "Asia and Pacific",
T ~ NA_character_))
ggplot(acled_regions %>% drop_na(e_regionpol_6C), aes(x = reorder(factor(regio), restrictions_masks), y = restrictions_masks)) +
geom_bar(stat = "identity", alpha  = .85) +
ylab("Restrictions/masks (mean topic prevalence)") +
xlab("") +
coord_flip() +
theme_bw()
ggsave(filename = "output/Figure_4.pdf",
width = 7, height = 5, dpi = "retina")
# aggregate at country-year level
acled_during <- acled_topics_vdem %>%
group_by(country, cowcode, country_text_id, year) %>%
summarise(across(c("health_care",
"restrictions_masks",
"vaccination",
"imprisonment_crime",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education",
"v2x_libdem"),
~ mean(., na.rm = T)),
events = length(data_id)) %>%
mutate(across(c("health_care",
"restrictions_masks",
"vaccination",
"imprisonment_crime",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education",
"v2x_libdem"),
~ .x * log(events),
.names = "{.col}_weighted")) %>%
ungroup() %>%
drop_na(v2x_libdem)
covid_agg <- covid %>%
mutate(cowcode = countrycode::countrycode(iso_code, "iso3c", "cown"),
year = as.numeric(str_sub(date, 1,4), .after = iso_code)) %>%
drop_na(cowcode) %>%
group_by(cowcode, year) %>%
summarise(across(c("stringency_index"),
~ mean(., na.rm = T),
.names = "{.col}_mean"),
across(c("total_deaths_per_million",
"excess_mortality_cumulative_per_million",
"total_vaccinations_per_hundred"),
~ max(., na.rm = T),
.names = "{.col}_max"),
across(c("human_development_index",
"gdp_per_capita",
"hospital_beds_per_thousand",
"life_expectancy",
"population"),
~ first(na.omit(.)),
.names = "{.col}_first")) %>%
ungroup()
# Record missing values
covid_agg[covid_agg == -Inf] <- NA
covid_agg[covid_agg == NaN] <- NA
#merge with other data"
acled_vdem_corona <-  left_join(acled_during, covid_agg) %>%
drop_na(population_first) %>%
mutate(across(c("health_care",
"restrictions_masks",
"imprisonment_crime",
"vaccination",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education"),
~ .x * (events/population_first),
.names = "{.col}_pop_weight")) %>%
mutate(tertiles_hdi = ntile(human_development_index_first, 3)) %>%
mutate(country_year = paste0(country, " (", year, ")"), .after = year)
# aggregate at country-year level
acled_during <- acled_topics_vdem %>%
group_by(country, cowcode, country_text_id, year) %>%
summarise(across(c("health_care",
"restrictions_masks",
"vaccination",
"imprisonment_crime",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education",
"v2x_libdem"),
~ mean(., na.rm = T)),
events = length(data_id)) %>%
mutate(across(c("health_care",
"restrictions_masks",
"vaccination",
"imprisonment_crime",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education",
"v2x_libdem"),
~ .x * log(events),
.names = "{.col}_weighted")) %>%
ungroup() %>%
drop_na(v2x_libdem)
# Add Coviod data
covid <- rio::import("data/owid-covid-data_update.csv")
covid_agg <- covid %>%
mutate(cowcode = countrycode::countrycode(iso_code, "iso3c", "cown"),
year = as.numeric(str_sub(date, 1,4), .after = iso_code)) %>%
drop_na(cowcode) %>%
group_by(cowcode, year) %>%
summarise(across(c("stringency_index"),
~ mean(., na.rm = T),
.names = "{.col}_mean"),
across(c("total_deaths_per_million",
"excess_mortality_cumulative_per_million",
"total_vaccinations_per_hundred"),
~ max(., na.rm = T),
.names = "{.col}_max"),
across(c("human_development_index",
"gdp_per_capita",
"hospital_beds_per_thousand",
"life_expectancy",
"population"),
~ first(na.omit(.)),
.names = "{.col}_first")) %>%
ungroup()
# Record missing values
covid_agg[covid_agg == -Inf] <- NA
covid_agg[covid_agg == NaN] <- NA
#merge with other data"
acled_vdem_corona <-  left_join(acled_during, covid_agg) %>%
drop_na(population_first) %>%
mutate(across(c("health_care",
"restrictions_masks",
"imprisonment_crime",
"vaccination",
"mishandling_corruption",
"economic_consequences",
"business_restrictions",
"education"),
~ .x * (events/population_first),
.names = "{.col}_pop_weight")) %>%
mutate(tertiles_hdi = ntile(human_development_index_first, 3)) %>%
mutate(country_year = paste0(country, " (", year, ")"), .after = year)
# Scatterplots
min_label = .5
events_min = 1
# Development and restrictions
ggplot(acled_vdem_corona %>% filter(events > events_min),
aes(x = human_development_index_first, y =  vaccination))  +
#  geom_smooth(method = "loess", color = "black", alpha = .5) +
geom_point(aes(size = events/population_first), fill = "lightgrey",
colour="black", pch=21, alpha = .5) +
geom_smooth(method = "lm", color= "black", alpha = 0.75) +
xlab("Human Development Index") + ylab("restrictions/masks") +
scale_size(trans = "identity", range = c(2,8), guide = "none") +
ggrepel::geom_text_repel(data =  acled_vdem_corona %>%
filter(country_text_id %in% c("AUT", "CHE", "ESP",
"UGA", "USA", "COD",
"CHN", "RUS", "IND",
"VEN", "ITA", "TUR",
"BRA", "HTI") & year == 2021),
aes(label = country), min.segment.length = 0) +
theme_bw() +
# ggpubr::stat_cor(method = "pearson", label.x = .5, label.y = .6) +
theme(panel.grid.minor = element_blank())
ggsave(filename = "output/Figure_A3.pdf",
width = 7, height = 5, dpi = "retina")
# Development and health care
ggplot(acled_vdem_corona %>% filter(events > events_min),
aes(x = human_development_index_first, y =  health_care)) +
geom_point(aes(size = events/population_first), fill = "lightgrey",
colour="black", pch=21, alpha = .5) +
geom_smooth(method = "lm", color= "black", alpha = 0.75) +
xlab("Human Development Index") + ylab("health care topic") +
scale_size(trans = "identity", range = c(2,8), guide = "none") +
ggrepel::geom_text_repel(data =  acled_vdem_corona %>%
filter(country_text_id %in% c("AUT", "CHE", "ESP",
"UGA", "USA", "COD",
"CHN", "RUS", "IND",
"VEN", "ITA", "TUR",
"BRA", "HTI") & year == 2021),
aes(label = country), min.segment.length = 0) +
theme_bw() +
#  ggpubr::stat_cor(method = "pearson", label.x = .5, label.y = .6) +
theme(panel.grid.minor = element_blank())
acled_vdem_corona %>%
select(health_care:education) %>%
map( ~ lmer(.x ~ I(stringency_index_mean/10) + log(total_deaths_per_million_max) +
I(human_development_index_first*100) +
hospital_beds_per_thousand_first +
I(total_vaccinations_per_hundred_max/10) +
log(population_first) + I(v2x_libdem*10) + log(events) + (1 | country) + (1 | year),
data = acled_vdem_corona)) %>%
map( ~texreg::texreg(.x))
reg_results <- acled_vdem_corona %>%
select(health_care:education) %>%
map( ~ lmer(.x ~ I(stringency_index_mean/10) + log(total_deaths_per_million_max) +
I(human_development_index_first*100) +
hospital_beds_per_thousand_first +
I(total_vaccinations_per_hundred_max/10) +
log(population_first) + I(v2x_libdem*10) + log(events) + (1 | country) + (1 | year),
data = acled_vdem_corona)) %>%
map( ~ broom.mixed::tidy(.x, conf.int = T, conf.level = 0.95)) %>%
bind_rows(.id = "outcome")  %>%
filter(term!= "(Intercept)") %>%
filter(effect== "fixed") %>%
mutate(term = case_when(term == "I(stringency_index_mean/10)" ~ "Stringency index (avg.)",
term == "log(total_deaths_per_million_max)" ~ "Deaths per million (log)",
term == "I(human_development_index_first * 100)" ~ "Human Development Index",
term == "hospital_beds_per_thousand_first" ~ "Hospital beds per thousand",
term == "I(total_vaccinations_per_hundred_max/10)" ~ "Vaccinations per thousand",
term == "log(population_first)" ~ "Population (log)",
term == "I(v2x_libdem * 10)" ~ "Liberal democracy",
term == "log(events)" ~ "Protest events (log)",
T ~ "term")) %>%
mutate(outcome = case_when(outcome == "economic_consequences" ~ "economy",
outcome == "imprisonment_crime" ~ "imprisonment/crime",
outcome == "vaccination" ~ "vaccination",
outcome == "education" ~ "education",
outcome == "health_care" ~ "health care",
outcome == "mishandling_corruption" ~ "mismanagement",
outcome == "restrictions_masks" ~ "restrictions/masks",
outcome == "business_restrictions" ~ "business restrictions",
T ~ NA_character_),
color = case_when(conf.low < 0 & conf.high < 0  ~ "sig",
conf.low > 0 & conf.high > 0  ~ "sig",
T ~ "nosig"),
shape = case_when(conf.low < 0 & conf.high < 0  ~ "sig",
conf.low > 0 & conf.high > 0  ~ "sig",
T ~ "nosig"))
ggplot(reg_results, aes(x = term, y = estimate)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_point(aes(x = term,
y = estimate, color = color)) +
geom_linerange(aes(x = term,
ymin = conf.low,
ymax = conf.high, color = color),
lwd = 1) +
scale_x_discrete("") +
scale_color_manual(values = c("grey70", "darkblue")) +
scale_y_continuous("Estimate") +
coord_flip() +
theme_bw() +
theme(legend.position = "none") +
facet_wrap(~ outcome, nrow = 4)
# Load V-Dem data (subset)
vdem_sub <- rio::import("data/vdem_subset.rda")
View(vdem_sub)
